# read in data
lizards <- read_csv(here("data_tidy", "lizards.csv"))
## Rows: 1628 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): date, scientific_name, common_name, zone, site, plot, spp, sex, rc...
## dbl  (6): pit, toe_num, sv_length, total_length, weight, pc
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# type `here()` in the console to determine where "here" is

Histogram (really bad) of lizard weights

# histograms only need one variable
ggplot(data = lizards, aes(x = weight)) +
  geom_histogram(fill = "orange", color = "blue",
                 size = 2,
                 linetype = "dotted")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Color refers to points and lines. Fill refers to polygons
# Size of polygon refers to the width of the border

Scatter Plot

# total_length on x-axis; weight on y-axis
# each shape number corresponds a different shape (0-25). some have fill colors. or you can use ascii codes

ggplot(data = lizards, aes(x = total_length, y = weight)) +
  geom_point(shape = 22,
             fill = "yellow",
             color = "red",
             size = 3,
             alpha = 0.7) # transparency: 0 = completelly transparent, 1 = opaque 

Bubble plot (take a point and change its size based on a variable)

# bubble plot where the color of the points changes based on common_name and the size of points varies based on total_lenth.
# DISCLAIMER: BAD IDEA

ggplot(data = lizards, aes(x = total_length, y = weight)) +
  geom_point(aes(color = common_name, size = total_length),
             shape = 22,
             fill = "black") +
  theme_light()

Facet grid vs. facet wrap #### facet wrap is a bunch of graphs in a row that wraps below if needed for space

ggplot(data = lizards, aes(x = total_length, y = weight)) +
  geom_point(aes(color = common_name)) +
  theme_light() +
  facet_wrap(~common_name, ncol = 4) # `~` means "by"

# you can change the number of columns in facet wrap. fills up first row first, then goes onto the next row

Facet Grid

ggplot(data = lizards, aes(x = total_length, y = weight)) +
  geom_point(aes(color = common_name)) +
  facet_grid(sex ~ tail) # row, then columns

# populates with data at each intersection

Getting things in order (default is alphabetical)

# find total lizard counts by common_name
lizard_count <- lizards %>% 
  group_by(common_name) %>% 
  summarise(count = n())

# or
lizard_count2 <- lizards %>% dplyr::count(common_name)
lizard_count_name_tail <- lizards %>%  count(common_name, tail)

# make common_name a factor, and assign each factor a level based on a value in another variable. Add a negative sign infront of the other variable to flip the order.

# `fct_reorder` = factor reorder
ggplot(data = lizard_count, aes(y = fct_reorder(common_name, count), x = count)) +
  geom_col(aes(fill = common_name), show.legend = FALSE) +
  labs(x = "Lizard counts",
       y = "Number of Lizards")

# switched y and x to make it more readable or use `coord_flip()`

Try converting common name to an ordered factor outside of ggplot

# make common name a factor based on count
common_name_factor <- lizard_count %>% 
  mutate(common_name = fct_reorder(common_name, count))

levels(common_name_factor$common_name)
## [1] "texas horned"         "eastern fence"        "colorado checkered"  
## [4] "lesser earless"       "little striped"       "western whiptail"    
## [7] "side-blotched lizard"

Axis scale breaks, limits and labels

Scatterplot: total_length (x) vs. weight (y)

ggplot(data = lizards, aes(x = total_length, y = weight)) +
  geom_point() +
  scale_x_continuous(breaks = c(0, 10, 50, 100, 500),
                     limits = c(0, 500),
                     expand = c(0, 0)) +
  scale_y_continuous(breaks = seq(from = 0, to = 70, by = 10),
                     limits = c(0, 70),
                     expand = c(0, 0))

# breaks are the tic mark breaks. the vector can be created manually or by sequence
# limits are min and max value

Transform the date column to class Date, then find counts of observations by date

lizard_obs_count <- lizards %>% # if you attach `lubridate` up top you dont need `lubridate::`
  mutate(date = lubridate::mdy(date)) %>% 
  count(date) %>% 
  mutate(count = n)

class(lizard_obs_count$date)
## [1] "Date"

Make a line plot (geom_line()) of date (x) and count (y)

# `y` lowercase is 2 didget year, `Y` upper case is a 4 year date
ggplot(data = lizard_obs_count, aes(x = date, y = count)) +
  geom_line() +
  scale_x_date(date_breaks = "3 years",
               date_labels = "%Y")

Color Scales

ggplot(data = lizards, aes(x = total_length, y = weight)) +
  geom_point(aes(color = weight)) +
  scale_color_gradient(low = "red", high = "navy") 

# scale_color_gradient for 2 colors. scale)color)gradientn for multiple colors
ggplot(data = lizards, aes(x = total_length, y = weight)) +
  geom_point(aes(color = weight)) +
  scale_color_gradientn(colors = c("orange", "cyan", "chartreuse", "black")) 

# to set breaks between colors
ggplot(data = lizards, aes(x = total_length, y = weight)) +
  geom_point(aes(color = weight)) +
  scale_color_stepsn(colors = c("green", "blue", "purple"),
                     breaks = c(0, 20, 40)) # if you give it more breaks than colors, it will great a gradient between colors 

#### Update a color scheme using a palette in paletteer Make a horizontal boxplot with common_name on the y-axis, total_length on the x-axis, with color changing based on common_name

lizards_fct <- lizards %>% 
  mutate(common_name = fct_reorder(common_name, total_length, .fun = median))

ggplot(data = lizards_fct, aes(y = common_name, x = total_length)) +
  geom_boxplot(aes(fill = common_name), show.legend = FALSE) +
  scale_fill_paletteer_d(palette = "beyonce::X10") #palette = "package::palette"

View(paletts_d_names) note package name and palette name Themes

ggplot(data = lizards, aes(x = total_length, y = weight)) +
  geom_point() +
  theme(panel.background = element_rect(fill = "yellow", color = "purple", size = 10),
        panel.grid.major.x = element_line(color = "red", size = 2), # most specific command wins
        panel.grid.major = element_line(color = "blue"),
        panel.grid.major.y = element_line(color = "orange", size = 1),
        plot.background = element_rect(fill = "cyan4"),
        axis.text.x = element_text(color = "orange"),
        axis.title = element_text(color = "yellow", size = 12))

# use `panel.grid = element_blank()` for no grid lines  
# panel is about where the data is shown
# plot is the larger background

ggrepel

great for adding labels to things

## make a subset from lizards, called ww_lizards, that only contains observations for "western whiptail" lizards from the site "sand"

ww_lizards <- lizards %>% 
  filter(common_name == "western whiptail", site == "sand")

ggplot(data = ww_lizards, aes(x = total_length, y = weight)) +
  geom_point() +
  geom_text_repel(aes(label = toe_num), size = 3, max.overlaps = 20)

# View(gapminder) dataset
# Filter for only Eurpope, then create a scatterplot of gdpPercap (x) and lifeEpx (y) and lifeExp (y), labeled by country name

gapminder_sub <- gapminder %>% 
  filter(continent == "Europe", year == 2007)

ggplot(data = gapminder_sub, aes(x = gdpPercap, y = lifeExp)) +
  geom_point() +
  geom_text_repel(aes(label = country), size = 3, max.overlaps = 20)

## gghighlight allows you to specify conditions where points or conditions will be highlighted from others

p <- ggplot(lizards, aes(x = total_length, y = weight)) +
  geom_point()

p + theme_dark()

p + gghighlight(toe_num == 250, label_key = toe_num)

q <- ggplot(data = lizards, aes(x = total_length, y = weight)) +
  geom_line(aes(color = common_name)) +
  gghighlight(max(weight) >30)
## label_key: common_name
q

## patchwork for compound figures the job of patchwork is to make it easier to apply common themes to multiple figures in a graphic

p | q

# use pemdas
# only works if the patchwork package is attached
(p | q) / q

(p * q) * (p / q) &
  theme_minimal()

## A few new graph types ### Marginal plots (from ggExtra)

whiptails <- lizards %>% 
  filter(common_name == "western whiptail") %>% 
  drop_na(total_length, weight)

# unique(whiptails$common_name)

ggplot(data = whiptails, aes(x = total_length, y = weight)) +
  geom_point() +
  geom_rug()

Marginal plot with boxplots

my_plot <- ggplot(data = whiptails, aes(x = total_length, y = weight)) +
  geom_point(aes(color = sex), size = 2) +
  scale_color_manual(values = c("cyan4", "black", "goldenrod"),
                     name = "Sex:",
                     labels = c("female", "juvenile", "male")) +
  theme_minimal() +
  theme(legend.position = "bottom")

ggMarginal(my_plot, type = "boxplot", groupColour = TRUE)

## ggMarginal also works with histograms and density plots

Beeswarm plot with ggbeesworm

ggplot(data = whiptails, aes(x = sex, y = weight)) +
  geom_beeswarm() +
  geom_violin(fill = NA) +
  geom_boxplot(fill = NA)

### A heatmap with geom_tile()

# make a new data frame called lizard_count, starting from lizards, with date converted to class Date. Then count by year and common name the number of lizards observed.

lizard_count2 <- lizards %>% 
  mutate(date = lubridate::mdy(date)) %>% 
  count(year = lubridate::year(date), common_name) %>% 
  drop_na()
# or use mutate(year = lubridate::year(date))
# or sure group_by, summarize n()

ggplot(data = lizard_count2, aes(x = year, y = common_name)) +
  geom_tile(aes(fill = n), show.legend = FALSE) +
  geom_text(aes(label = n), color = "white", size = 3) +
  scale_fill_gradientn(colors = c("navy", "red", "orange")) +
  theme_minimal() +
  labs(x = "Year", y = "Lizard common name")

# gradientn allows you to pick as many colors as you want  
# scale_fill_viridis_c() is color blind friendly
ggplot(data = lizard_count2, aes(x = year, y = common_name)) +
  geom_tile(aes(fill = n), show.legend = FALSE) +
  geom_text(aes(label = n), color = "white", size = 3) +
  scale_fill_viridis_c()

## Make a map! Use read_sf to read in the “doc.kml” file

# type `here()` in the console to determine where you are
jornada_vegetation <- read_sf(here("data_raw", "spatial_vegetation", "doc.kml")) %>% 
  select(Name) %>% 
  clean_names()

ggplot(data = jornada_vegetation) +
  geom_sf(aes(fill = name), color = NA) +
  scale_fill_paletteer_d(palette = "ggthemes::manyeys") +
  labs(x = "Longitude",
       y = "Latitude",
       fill = "Dominant vegetation:")

# `scale_fill_paletteer_d()` because the names are discrete values rather than continuous
jornada_vegetation <- read_sf(here("data_raw", "spatial_vegetation", "doc.kml")) %>% 
  select(Name) %>% 
  clean_names()

ggplot(data = jornada_vegetation) +
  geom_sf(aes(fill = name), color = NA) +
  scale_fill_paletteer_d(palette = "ggthemes::manyeys") +
  theme_void()

Alt Text

set this up in the top of the code chunck

library(palmerpenguins)
ggplot(penguins, aes(x = body_mass_g)) +
        geom_histogram() +
         facet_wrap(~species)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).

a histogram showing the distribution of body mass in Adelie, Chinstrap, and Gentoo penguins